import jieba

def read_text(filename):
    text=open(filename,"rt",encoding="utf-8").read()
    for c in "abcdefghijklmnopqrstuvwxyz"\
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"\
     "~!@#$%^&*()_+=-`[]\{}|:\";',./<>?，。、 "\
     "《》？；’：”“【】、｛｝§=-+——）（*&……%￥#@！～·\n\t":
        text=text.replace(c,"")
    return text

def stat_char(text):
    chars={}
    for c in text:
        chars[c]=chars.get(c,0)+1
        result=list(chars.items())
    return result

filename="三体.txt"
text=read_text(filename)
conclusion=stat_char(text)
excludes={"一个","没有","他们","我们","这个","自己","现在","已经","世界","什么","可能","看到","看到","知道","地球","太空","人类","三体","可以","宇宙","就是","太阳","这样","不是","你们",\
"那个","飞船","舰队","只是","这种","出现","如果","时间","两个","文明","这里","开始","最后","一样","起来","只有","东西","发现","这些","进行","这是","信息","还是","它们","感觉","计划","智子",\
"一种","然后","...","一切","看着","这时","很快","太阳系","还有","人们","真的","所有","面壁","那些","空间","技术","光速","存在","任何","需要","应该","一直","研究","消失","因为","世纪","行星","AA","北海",\
"当然","能够","只能","完全","问题","恒星","一些","发出","变成","产生","......","一次","水滴","天明","目光","同时","这么","系统","生活","孩子","所以","出来","甚至","二维","进入","工作",\
"黑暗","城市","方向","不同","突然","一片","成为","巨大","发射","声音","发生","不能","三个","认为","位置","显示","冬眠","时代","周围","怎么","不过","不会","大史","那里","由于",\
"其他","的话","其实","处于","状态","速度","正在","看看","肯定","其中","几乎","一起","目标","仿佛","感到","社会","一下","那么","地方","许多","思想","以前","有些","后来","地面",\
"立刻","通过","即使","眼睛","似乎","整个","战舰","距离","来自","继续","轨道","回答","以后","她们","那样","公主","下来","生命","纪元","想象","蓝色","有人","好像","虽然","注意","为了","对于",\
"得到","听到","加速","再次","过去","点点头","那时","显然","渐渐","运行","告诉","希望","小时","第一次","外面","木星","基地","威慑","森林","看上去","也许","无法","目前","不要","一条","像是",\
"部分","一点","离开","表面","一部分","仍然","情况","星星","而是","科学","大部分","一般","不到","首先","一颗","星环","这次","变得","形成","完成","作为","太空城","建立","联合国","一只"}
txt=open("三体.txt","r",encoding="utf-8").read()
words=jieba.lcut(txt)
counts={}
for word in words:
    if len(word)==1:
        continue
    else:
        counts[word]=counts.get(word,0)+1
for word in excludes:
    del(counts[word])
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
print("总字数：", len(text))
print("用字数：", len(conclusion))
print("人物出场统计")
for i in range(10):
    word,count=items[i]
    print(word.ljust(3),repr(count).rjust(10))